Extracting hourly temperature data from NOAA ISD (integrated surface database) weather data

ish_parser python module is from: https://github.com/haydenth/ish_parser


In [1]:
# boilerplate includes
import sys
import os

import numpy as np
import matplotlib as mpl
#mpl.use('nbagg')
import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D

#import mpld3 # for outputting interactive html figures

import pandas as pd
import seaborn as sns

import ish_parser
import gzip
import ftplib
import io
import dateutil

from IPython.display import display, HTML
%matplotlib notebook
plt.style.use('seaborn-notebook')

pd.set_option('display.max_columns', None)

Parameters

Main parameter is a list of the weather station callsigns


In [2]:
# PARAMETERS 

RUNSETS = [ # callsign, timezone, start_date, end_date
           ['KSFO','-08:00', '1950-01-01','2016-01-01'],#
           ['KFAT','-08:00', '1950-01-01','2016-01-01'],#
           ['KJAX','-05:00', '1950-01-01','2016-01-01'],#
           ['KBUR','-08:00', '1973-01-01','2016-01-01'],#
           ['KIAH','-06:00', '1970-01-01','2016-01-01'],#
           ['KLAX','-08:00', '1950-01-01','2016-01-01'],#
           ['KMCO','-05:00', '1973-01-01','2016-01-01'],#
           ['KRIV','-08:00', '1950-01-01','2016-01-01'],#
           ['KTPA','-05:00', '1950-01-01','2016-01-01'],#
           ['KSAN','-08:00', '1950-01-01','2016-01-01'],#
           ['KMIA','-05:00', '1950-01-01','2016-01-01'],#
          ]

TEMPERATURE_DATADIR = '../data/temperatures/ISD'
TEMPERATURE_OUTDIR = '../data/temperatures'
SUPPRESS_FIGURE_DISPLAY = True

## parameters used in fetching

USE_CACHED_STATION_H5_FILES = True # Don't download new temperature files unless required
FTPHOST = 'ftp.ncdc.noaa.gov'
FETCH_STATIONS_LIST_FILE = True

## parameters used in cleaning

TEMP_COL = 'AT' # The label of the hourly temperature column we make/output

# Resampling and interpolation parameters
# spline order used for converting to on-the-hour and filling small gaps
BASE_INTERPOLATION_K = 1 # 1 for linear interpolation
# give special treatment to data gaps longer than...
POTENTIALLY_PROBLEMATIC_GAP_SIZE = pd.Timedelta('03:00:00')

Loop through all the sites/stations and run the sub-scripts

Most of the time is actually consumed saving figure files


In [3]:
for STATION_CALLSIGN, LOCAL_TIME_OFFSET, START_DATE, END_DATE in RUNSETS:
    SUBNOTEBOOK_FLAG = True
    DATADIR = TEMPERATURE_DATADIR
    OUTDIR = TEMPERATURE_OUTDIR
    %run -i "Fetching and parsing ISH.ipynb"
    %run -i "Cleaning temperatures.ipynb"


Fetching and parsing  KBUR
#### 722880 23152 range(1943, 2018)
/pub/data/noaa/1943/722880-23152-1943.gz
/pub/data/noaa/1944/722880-23152-1944.gz
/pub/data/noaa/1945/722880-23152-1945.gz
/pub/data/noaa/1946/722880-23152-1946.gz
ERROR: 550 /pub/data/noaa/1946/722880-23152-1946.gz: No such file or directory
/pub/data/noaa/1947/722880-23152-1947.gz
ERROR: 550 /pub/data/noaa/1947/722880-23152-1947.gz: No such file or directory
/pub/data/noaa/1948/722880-23152-1948.gz
ERROR: 550 /pub/data/noaa/1948/722880-23152-1948.gz: No such file or directory
/pub/data/noaa/1949/722880-23152-1949.gz
ERROR: 550 /pub/data/noaa/1949/722880-23152-1949.gz: No such file or directory
/pub/data/noaa/1950/722880-23152-1950.gz
ERROR: 550 /pub/data/noaa/1950/722880-23152-1950.gz: No such file or directory
/pub/data/noaa/1951/722880-23152-1951.gz
ERROR: 550 /pub/data/noaa/1951/722880-23152-1951.gz: No such file or directory
/pub/data/noaa/1952/722880-23152-1952.gz
ERROR: 550 /pub/data/noaa/1952/722880-23152-1952.gz: No such file or directory
/pub/data/noaa/1953/722880-23152-1953.gz
ERROR: 550 /pub/data/noaa/1953/722880-23152-1953.gz: No such file or directory
/pub/data/noaa/1954/722880-23152-1954.gz
ERROR: 550 /pub/data/noaa/1954/722880-23152-1954.gz: No such file or directory
/pub/data/noaa/1955/722880-23152-1955.gz
ERROR: 550 /pub/data/noaa/1955/722880-23152-1955.gz: No such file or directory
/pub/data/noaa/1956/722880-23152-1956.gz
ERROR: 550 /pub/data/noaa/1956/722880-23152-1956.gz: No such file or directory
/pub/data/noaa/1957/722880-23152-1957.gz
ERROR: 550 /pub/data/noaa/1957/722880-23152-1957.gz: No such file or directory
/pub/data/noaa/1958/722880-23152-1958.gz
ERROR: 550 /pub/data/noaa/1958/722880-23152-1958.gz: No such file or directory
/pub/data/noaa/1959/722880-23152-1959.gz
ERROR: 550 /pub/data/noaa/1959/722880-23152-1959.gz: No such file or directory
/pub/data/noaa/1960/722880-23152-1960.gz
ERROR: 550 /pub/data/noaa/1960/722880-23152-1960.gz: No such file or directory
/pub/data/noaa/1961/722880-23152-1961.gz
ERROR: 550 /pub/data/noaa/1961/722880-23152-1961.gz: No such file or directory
/pub/data/noaa/1962/722880-23152-1962.gz
ERROR: 550 /pub/data/noaa/1962/722880-23152-1962.gz: No such file or directory
/pub/data/noaa/1963/722880-23152-1963.gz
ERROR: 550 /pub/data/noaa/1963/722880-23152-1963.gz: No such file or directory
/pub/data/noaa/1964/722880-23152-1964.gz
ERROR: 550 /pub/data/noaa/1964/722880-23152-1964.gz: No such file or directory
/pub/data/noaa/1965/722880-23152-1965.gz
ERROR: 550 /pub/data/noaa/1965/722880-23152-1965.gz: No such file or directory
/pub/data/noaa/1966/722880-23152-1966.gz
ERROR: 550 /pub/data/noaa/1966/722880-23152-1966.gz: No such file or directory
/pub/data/noaa/1967/722880-23152-1967.gz
ERROR: 550 /pub/data/noaa/1967/722880-23152-1967.gz: No such file or directory
/pub/data/noaa/1968/722880-23152-1968.gz
ERROR: 550 /pub/data/noaa/1968/722880-23152-1968.gz: No such file or directory
/pub/data/noaa/1969/722880-23152-1969.gz
ERROR: 550 /pub/data/noaa/1969/722880-23152-1969.gz: No such file or directory
/pub/data/noaa/1970/722880-23152-1970.gz
ERROR: 550 /pub/data/noaa/1970/722880-23152-1970.gz: No such file or directory
/pub/data/noaa/1971/722880-23152-1971.gz
ERROR: 550 /pub/data/noaa/1971/722880-23152-1971.gz: No such file or directory
/pub/data/noaa/1972/722880-23152-1972.gz
ERROR: 550 /pub/data/noaa/1972/722880-23152-1972.gz: No such file or directory
/pub/data/noaa/1973/722880-23152-1973.gz
/pub/data/noaa/1974/722880-23152-1974.gz
/pub/data/noaa/1975/722880-23152-1975.gz
/pub/data/noaa/1976/722880-23152-1976.gz
/pub/data/noaa/1977/722880-23152-1977.gz
/pub/data/noaa/1978/722880-23152-1978.gz
/pub/data/noaa/1979/722880-23152-1979.gz
/pub/data/noaa/1980/722880-23152-1980.gz
/pub/data/noaa/1981/722880-23152-1981.gz
/pub/data/noaa/1982/722880-23152-1982.gz
/pub/data/noaa/1983/722880-23152-1983.gz
/pub/data/noaa/1984/722880-23152-1984.gz
/pub/data/noaa/1985/722880-23152-1985.gz
/pub/data/noaa/1986/722880-23152-1986.gz
/pub/data/noaa/1987/722880-23152-1987.gz
/pub/data/noaa/1988/722880-23152-1988.gz
/pub/data/noaa/1989/722880-23152-1989.gz
/pub/data/noaa/1990/722880-23152-1990.gz
/pub/data/noaa/1991/722880-23152-1991.gz
/pub/data/noaa/1992/722880-23152-1992.gz
/pub/data/noaa/1993/722880-23152-1993.gz
/pub/data/noaa/1994/722880-23152-1994.gz
/pub/data/noaa/1995/722880-23152-1995.gz
/pub/data/noaa/1996/722880-23152-1996.gz
/pub/data/noaa/1997/722880-23152-1997.gz
/pub/data/noaa/1998/722880-23152-1998.gz
/pub/data/noaa/1999/722880-23152-1999.gz
/pub/data/noaa/2000/722880-23152-2000.gz
/pub/data/noaa/2001/722880-23152-2001.gz
/pub/data/noaa/2002/722880-23152-2002.gz
/pub/data/noaa/2003/722880-23152-2003.gz
/pub/data/noaa/2004/722880-23152-2004.gz
WARNING:root:unable to load report, error: Non matching lengths. Expected 1391, got 1346
WARNING:root:unable to load report, error: Non matching lengths. Expected 1425, got 1348
WARNING:root:unable to load report, error: Non matching lengths. Expected 1441, got 1348
WARNING:root:unable to load report, error: Non matching lengths. Expected 1439, got 1346
WARNING:root:unable to load report, error: Non matching lengths. Expected 866, got 853
WARNING:root:unable to load report, error: Non matching lengths. Expected 850, got 837
WARNING:root:unable to load report, error: Non matching lengths. Expected 1457, got 1348
WARNING:root:unable to load report, error: Non matching lengths. Expected 1407, got 1346
/pub/data/noaa/2005/722880-23152-2005.gz
/pub/data/noaa/2006/722880-23152-2006.gz
/pub/data/noaa/2007/722880-23152-2007.gz
/pub/data/noaa/2008/722880-23152-2008.gz
/pub/data/noaa/2009/722880-23152-2009.gz
/pub/data/noaa/2010/722880-23152-2010.gz
/pub/data/noaa/2011/722880-23152-2011.gz
/pub/data/noaa/2012/722880-23152-2012.gz
/pub/data/noaa/2013/722880-23152-2013.gz
/pub/data/noaa/2014/722880-23152-2014.gz
/pub/data/noaa/2015/722880-23152-2015.gz
/pub/data/noaa/2016/722880-23152-2016.gz
/pub/data/noaa/2017/722880-23152-2017.gz
444437 records
Saving station data to: '../data/temperatures/ISD/722880-23152-AT.h5'
#### 722880 99999 range(2000, 2004)
/pub/data/noaa/2000/722880-99999-2000.gz
/pub/data/noaa/2001/722880-99999-2001.gz
/pub/data/noaa/2002/722880-99999-2002.gz
/pub/data/noaa/2003/722880-99999-2003.gz
37887 records
Saving station data to: '../data/temperatures/ISD/722880-99999-AT.h5'
#### 999999 23152 range(1948, 1970)
/pub/data/noaa/1948/999999-23152-1948.gz
/pub/data/noaa/1949/999999-23152-1949.gz
/pub/data/noaa/1950/999999-23152-1950.gz
/pub/data/noaa/1951/999999-23152-1951.gz
/pub/data/noaa/1952/999999-23152-1952.gz
/pub/data/noaa/1953/999999-23152-1953.gz
/pub/data/noaa/1954/999999-23152-1954.gz
/pub/data/noaa/1955/999999-23152-1955.gz
/pub/data/noaa/1956/999999-23152-1956.gz
/pub/data/noaa/1957/999999-23152-1957.gz
/pub/data/noaa/1958/999999-23152-1958.gz
/pub/data/noaa/1959/999999-23152-1959.gz
/pub/data/noaa/1960/999999-23152-1960.gz
/pub/data/noaa/1961/999999-23152-1961.gz
/pub/data/noaa/1962/999999-23152-1962.gz
/pub/data/noaa/1963/999999-23152-1963.gz
/pub/data/noaa/1964/999999-23152-1964.gz
/pub/data/noaa/1965/999999-23152-1965.gz
/pub/data/noaa/1966/999999-23152-1966.gz
/pub/data/noaa/1967/999999-23152-1967.gz
/pub/data/noaa/1968/999999-23152-1968.gz
/pub/data/noaa/1969/999999-23152-1969.gz
160682 records
Saving station data to: '../data/temperatures/ISD/999999-23152-AT.h5'
Saving combined data to: 'KBUR_AT.h5'
CPU times: user 1min 9s, sys: 1.84 s, total: 1min 10s
Wall time: 4min 49s
Cleaning temperature data for  KBUR
686 duplicates
removing 159 points
removing 30 points
removing 6 points
removing 0 points
# Potentially problematic gaps: 462
Saving cleaned temp data to: ../data/temperatures/KBUR_AT_cleaned

In [ ]:

prepare temperature CSV for MedFoes input


In [4]:
for RUNSET, LOCAL_TIME_OFFSET, START_DATE, END_DATE in RUNSETS:
    
    # convert from UTC to a *fixed offset* approximating local (solar time would be better, but this is good enough)
    # Note: not just the local timezone, because daylight-savings time is a pointless complication
    # convert the LOCAL_TIME_OFFSET string to number of seconds
    tmp = LOCAL_TIME_OFFSET.split(':')
    tmp = int(tmp[0])*3600+int(tmp[1])*60
    # create a timezone offset
    sitetz = dateutil.tz.tzoffset(LOCAL_TIME_OFFSET, tmp)
    
    ## Load Temperature data
    tfile = os.path.join(TEMPERATURE_OUTDIR, "{}_AT_cleaned.h5".format(RUNSET))
    tempdf = pd.read_hdf(tfile, 'table')
    # apply the timezone conversion
    tempdf.index = tempdf.index.tz_convert(sitetz)

    ## Save the desired date range as a CSV file
    # note: the date ranges were determined by inspecting the full temperature data
    # and excluding problematic early dates (big gaps, lots of outliers, ect.)
    outfn = os.path.join(TEMPERATURE_OUTDIR, "{}_AT_cleaned_trimmed".format(RUNSET))
    t = tempdf.loc[START_DATE:]
    print("Saving {} {} to {} : {}.csv".format(RUNSET, t.index[0], t.index[-1], outfn))
    t.to_csv(outfn+'.csv', index_label='datetime')


Saving KBUR 1973-01-01 00:00:00-08:00 to 2017-08-19 15:00:00-08:00 : ../data/temperatures/KBUR_AT_cleaned_trimmed.csv

In [ ]: